from pyspark.sql import SparkSession

if __name__ == '__main__':
    # 构建SparkSession对象
    spark = SparkSession.builder. \
        appName("local[*]"). \
        config("spark.sql.shuffle.partitions", "4"). \
        getOrCreate()
    # appName 设置程序名称
    # config 设置常用属性。可以通过此来设置配置
    # 最后通过getOrCreate 创建 SparkSession对象

    # 从SparkSession中获取SparkContext
    sc = spark.sparkContext

    df = spark.read.csv("../data/sql/stu_score.txt", header=False)
    df2 = df.toDF("id", "name", "score")
    df2.printSchema()
    df2.show(truncate=False)
    df2.createTempView("score")

    # SQL风格
    spark.sql("""SELECT * FROM score WHERE name ='语文' LIMIT 5""").show()

    # DSL风格
    df2.where("name = '语文'").limit(5).show()
